PCANet: Learning diffusivity (m) to solution (u) map for the Poisson problem¶

Data is located in ../data directory, and key data of our interest is in Poisson_samples.npz file.

On data¶

The Dropbox folder NeuralOperator_Survey_Shared_Data_March2025 contains the key data to reproduce the results in the survey paper.

If you did not generate data by running survey_work/problems/poisson/Poisson.ipynb, consider copying the contents of dropbox folder NeuralOperator_Survey_Shared_Data_March2025/survey_work/problems/poisson/data/ into survey_work/problems/poisson/data/ before running this notebook.

Results¶

Below shows the neural operator prediction for different samples of test input.

No description has been provided for this image

In [1]:
import sys
import os

import numpy as np # load numpy before torch
import torch


src_path = "../../src/"
sys.path.append(src_path + 'plotting/')
from field_plot import field_plot
from plot_loss import plot_loss
from plot_svd import plot_s_vec_values

sys.path.append(src_path + 'data/')
from dataMethods import DataProcessor

sys.path.append(src_path + 'nn/pcanet/')
sys.path.append(src_path + 'nn/mlp/') # need this here so that PCANet can be imported (it imports MLP)
from torch_pcanet import PCANet
import uq

import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
plt.rcParams['text.latex.preamble'] = r'\usepackage{amsmath}'

# set seed
seed = 0
np.random.seed(seed)
torch.manual_seed(seed)
from sklearn.metrics import roc_auc_score, roc_curve
In [2]:
data_folder = '../../../autodl-tmp/data/'
results_dir = data_folder
if not os.path.exists(results_dir):
    os.makedirs(results_dir)

Load data¶

In [3]:
num_train = 3500
num_test = 1000

num_inp_fn_points = 2601 # number of grid points for the input function
num_out_fn_points = 2601 # number of evaluations points for the output function
num_Y_components = 1 # scalar field
num_inp_red_dim = 100 # number of reduced dimensions for the input data
num_out_red_dim = 100 # number of reduced dimensions for the output data
out_coordinate_dimension = 2 # domain for output function is 2D

# training hyperparameters
batch_size = 20
epochs = 1000
lr = 1.0e-3
act_fn = torch.relu

data_prefix = 'Poisson'

data = DataProcessor(data_folder + data_prefix + '_samples_no-ood.npz', num_train, num_test, num_inp_fn_points, num_out_fn_points, num_Y_components, num_inp_red_dim, num_out_red_dim)
train_data = {'X_train': data.X_train, 'X_trunk': data.X_trunk, 'Y_train': data.Y_train}
test_data = {'X_train': data.X_test, 'X_trunk': data.X_trunk, 'Y_train': data.Y_test}

print('X_train:',data.X_train.shape)
print('Y_train:',data.Y_train.shape)
print('X_test:',data.X_test.shape)
print('Y_test:',data.Y_test.shape)
print('X_trunk:',data.X_trunk.shape)

print('X_train_svd_projector:',data.X_train_svd_projector.shape)
print('Y_train_svd_projector:',data.Y_train_svd_projector.shape)
X_train: (3500, 100)
Y_train: (3500, 100)
X_test: (1000, 100)
Y_test: (1000, 100)
X_trunk: (2601, 2)
X_train_svd_projector: (100, 2601)
Y_train_svd_projector: (100, 2601)

Plot singular values¶

In [4]:
plot_annot_xy = [0.15, 0.35, 0.8, 0.5]
plot_annot_xy_region = [-5, 300, -0.04, 0.15]
xy_text_vec = []
xy_text_vec.append([(10, 20), (-20, -25)])
xy_text_vec.append([(5, -5), (-20, -25)])
xy_text_vec.append([(-20, 15), (-20, -25)])
l_style_vec = ['-', '-']
plot_s_vec_values([data.X_train_s_values, data.Y_train_s_values], [num_inp_red_dim, num_out_red_dim], \
                    ['m', 'u'], \
                    l_style_vec, \
                    xy_text_vec, \
                    plot_annot_xy, \
                    plot_annot_xy_region, \
                    results_dir + data_prefix + '_svd_analysis_m_u')
j = 0, i = 0, index = 100, index_val = 0.032590066211883206
j = 0, i = 1, index = 100, index_val = 0.0033409165710523744
j = 1, i = 0, index = 35, index_val = 0.1006803895025745
j = 1, i = 1, index = 10, index_val = 0.10902619155367585
j = 2, i = 0, index = 289, index_val = 0.01000805121239042
j = 2, i = 1, index = 50, index_val = 0.009894120146498975
No description has been provided for this image

Create model and train the network¶

In [5]:
num_layers = 4
num_neurons = 250
model_save_path = results_dir + 'PCANet/'
model_save_file = model_save_path + 'model.pkl'
os.makedirs(os.path.dirname(model_save_path), exist_ok=True)

model = PCANet(num_layers, num_neurons, act_fn, \
               num_inp_red_dim, num_out_red_dim, \
               save_file = model_save_file)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print('Number of trainable parameters: {}'.format(trainable_params))
Using device: cuda
Number of trainable parameters: 175850
In [6]:
# save the data and info
data_to_save = data.get_data_to_save()
model_metadata = {  'data': data_to_save, \
                    'num_train': num_train, \
                    'num_test': num_test, \
                    'num_inp_fn_points': num_inp_fn_points, \
                    'num_out_fn_points': num_out_fn_points, \
                    'num_Y_components': num_Y_components, \
                    'out_coordinate_dimension': out_coordinate_dimension, \
                    'num_inp_red_dim': num_inp_red_dim, \
                    'num_out_red_dim': num_out_red_dim, \
                    'num_layers': num_layers, \
                    'num_neurons': num_neurons, \
                    'epochs': epochs, \
                    'batch_size': batch_size, \
                    'lr': lr}

# attach it to the model
model.metadata = model_metadata
In [7]:
# Train
model.train(train_data, test_data, batch_size=batch_size, \
            epochs = epochs, lr = lr, \
            save_model = True, save_epoch = 100)
--------------------------------------------------
Starting training with 175850 trainable parameters...
--------------------------------------------------
--------------------------------------------------
Epoch:     1, Train Loss (l2 squared): 5.530e+00, Test Loss (l2 squared): 1.641e+00, Time (sec): 0.356
--------------------------------------------------
--------------------------------------------------
Epoch:   100, Train Loss (l2 squared): 1.245e-01, Test Loss (l2 squared): 2.648e-01, Time (sec): 0.259
--------------------------------------------------
--------------------------------------------------
Model parameters saved at epoch 100
--------------------------------------------------
--------------------------------------------------
Epoch:   200, Train Loss (l2 squared): 6.100e-02, Test Loss (l2 squared): 2.165e-01, Time (sec): 0.207
--------------------------------------------------
--------------------------------------------------
Model parameters saved at epoch 200
--------------------------------------------------
--------------------------------------------------
Epoch:   300, Train Loss (l2 squared): 4.109e-02, Test Loss (l2 squared): 2.039e-01, Time (sec): 0.242
--------------------------------------------------
--------------------------------------------------
Model parameters saved at epoch 300
--------------------------------------------------
--------------------------------------------------
Epoch:   400, Train Loss (l2 squared): 3.341e-02, Test Loss (l2 squared): 2.023e-01, Time (sec): 0.152
--------------------------------------------------
--------------------------------------------------
Model parameters saved at epoch 400
--------------------------------------------------
--------------------------------------------------
Epoch:   500, Train Loss (l2 squared): 2.995e-02, Test Loss (l2 squared): 2.025e-01, Time (sec): 0.194
--------------------------------------------------
--------------------------------------------------
Model parameters saved at epoch 500
--------------------------------------------------
--------------------------------------------------
Epoch:   600, Train Loss (l2 squared): 2.836e-02, Test Loss (l2 squared): 2.028e-01, Time (sec): 0.252
--------------------------------------------------
--------------------------------------------------
Model parameters saved at epoch 600
--------------------------------------------------
--------------------------------------------------
Epoch:   700, Train Loss (l2 squared): 2.760e-02, Test Loss (l2 squared): 2.031e-01, Time (sec): 0.211
--------------------------------------------------
--------------------------------------------------
Model parameters saved at epoch 700
--------------------------------------------------
--------------------------------------------------
Epoch:   800, Train Loss (l2 squared): 2.717e-02, Test Loss (l2 squared): 2.033e-01, Time (sec): 0.291
--------------------------------------------------
--------------------------------------------------
Model parameters saved at epoch 800
--------------------------------------------------
--------------------------------------------------
Epoch:   900, Train Loss (l2 squared): 2.695e-02, Test Loss (l2 squared): 2.034e-01, Time (sec): 0.219
--------------------------------------------------
--------------------------------------------------
Model parameters saved at epoch 900
--------------------------------------------------
--------------------------------------------------
Epoch:  1000, Train Loss (l2 squared): 2.684e-02, Test Loss (l2 squared): 2.035e-01, Time (sec): 0.202
--------------------------------------------------
--------------------------------------------------
Model parameters saved at epoch 1000
--------------------------------------------------
--------------------------------------------------
Train time: 224.328, Epochs:  1000, Batch Size:    20, Final Train Loss (l2 squared): 2.684e-02, Final Test Loss (l2 squared): 2.035e-01
--------------------------------------------------
In [8]:
## Plotting the loss history

plot_loss( model.train_loss_log[:, 0], \
           model.test_loss_log[:, 0], \
           fs = 14, lw = 2, \
           savefile = results_dir+'loss_his.png', \
           figsize = [6,6])
No description has been provided for this image

Test and plot the output of network¶

In [9]:
# load the model
model = torch.load(model_save_file, weights_only=False)

sfname = model_save_file.split(os.path.sep)
print('-'*50)
print('Model loaded from: {}'.format(sfname[-2] + '/' + sfname[-1]))
print('\n' + '-'*50)
print('Model metadata:', model.metadata.keys())
print('\n' + '-'*50)
print('Model:', model)
--------------------------------------------------
Model loaded from: PCANet/model.pkl

--------------------------------------------------
Model metadata: dict_keys(['data', 'num_train', 'num_test', 'num_inp_fn_points', 'num_out_fn_points', 'num_Y_components', 'out_coordinate_dimension', 'num_inp_red_dim', 'num_out_red_dim', 'num_layers', 'num_neurons', 'epochs', 'batch_size', 'lr'])

--------------------------------------------------
Model: PCANet(
  (net): MLP(
    (layers): ModuleList(
      (0): Linear(in_features=100, out_features=250, bias=True)
      (1-2): 2 x Linear(in_features=250, out_features=250, bias=True)
      (3): Linear(in_features=250, out_features=100, bias=True)
    )
  )
)
In [10]:
Y_test = test_data['Y_train']
Y_test_pred = model.predict(test_data['X_train']).detach().cpu().numpy()
print('test_out shape: {}, test_pred shape: {}'.format(Y_test.shape, Y_test_pred.shape))
error = np.linalg.norm(Y_test - Y_test_pred, axis = 1)/np.linalg.norm(Y_test, axis = 1)
print('Num tests: {:5d}, Mean Loss (rel l2): {:.3e}, Std Loss (rel l2): {:.3e}'.format(num_test, np.mean(error), np.std(error)))
test_out shape: (1000, 100), test_pred shape: (1000, 100)
Num tests:  1000, Mean Loss (rel l2): 1.060e-01, Std Loss (rel l2): 3.565e-02
In [11]:
def apply_dirichlet_bc(u, bc_value, bc_node_ids):
    u[bc_node_ids] = bc_value
    return u
In [12]:
rows, cols = 4, 4
fs = 20
fig, axs = plt.subplots(rows, cols, figsize=(16, 13))

decode = True
apply_dirichlet_bc_flag = True

# row: m, u_true, u_pred, u_diff
u_tags = [r'$m$', r'$u_{true}$', r'$u_{pred}$', r'$u_{true} - u_{pred}$']
cmaps = ['jet', 'viridis', 'viridis', 'hot']

nodes = data.X_trunk 

# randomly choose rows number of samples
i_choices = np.random.choice(num_test, rows, replace=False)

for i in range(rows):
    
    i_plot = i_choices[i]

    i_pred = Y_test_pred[i_plot]
    i_truth = Y_test[i_plot]
    i_m_test = data.X_test[i_plot]
    if decode:
        i_pred = data.decoder_Y(i_pred)
        i_truth = data.decoder_Y(i_truth)
        i_m_test = data.decoder_X(i_m_test)
    if apply_dirichlet_bc_flag:
        i_pred = apply_dirichlet_bc(i_pred, 0.0, data.u_mesh_dirichlet_boundary_nodes)
        # verify for i_truth
        if np.abs(i_truth[data.u_mesh_dirichlet_boundary_nodes]).max() > 1.0e-9:
            print('Warning: Dirichlet BC not applied to i_truth. Err : {}'.format(np.abs(i_truth[data.u_mesh_dirichlet_boundary_nodes]).max()))
            
    i_diff = i_pred - i_truth
    i_diff_norm = np.linalg.norm(i_diff) / np.linalg.norm(i_truth)
    print('i_plot = {:5d}, error (rel l2): {:.3e}'.format(i_plot, i_diff_norm))

    uvec = [i_m_test, i_truth, i_pred, i_diff]
    
    for j in range(cols):
        
        cbar = field_plot(axs[i,j], uvec[j], nodes, cmap = cmaps[j])

        divider = make_axes_locatable(axs[i,j])
        cax = divider.append_axes('right', size='8%', pad=0.03)
        cax.tick_params(labelsize=fs)

        if j == 0 or j == cols - 1:
            # format cbar ticks
            kfmt = lambda x, pos: "{:g}".format(x)
            
            cbar = fig.colorbar(cbar, cax=cax, orientation='vertical', format = kfmt)
        else:
            cbar = fig.colorbar(cbar, cax=cax, orientation='vertical')

        if i == 0 and j < cols - 1:
            axs[i,j].set_title(u_tags[j], fontsize=fs)
        
        if j == cols - 1:
            err_str = 'err (rel l2): {:.3f}%'.format(i_diff_norm*100)
            if i == 0:
                err_str = u_tags[j] + '\n' + err_str
            axs[i,j].set_title(err_str, fontsize=fs)

        axs[i,j].axis('off')

fig.tight_layout()
fig.suptitle('Poisson problem: Compare neural operator predictions ({})'.format(model.name), fontsize=1.25*fs, y=1.025)
fig.savefig(results_dir+'neural_operator_prediction_comparison.png',  bbox_inches='tight')
plt.show()
i_plot =   801, error (rel l2): 7.305e-03
i_plot =   311, error (rel l2): 9.159e-03
i_plot =    85, error (rel l2): 7.269e-03
i_plot =   435, error (rel l2): 7.810e-03
No description has been provided for this image

HMC¶

In [7]:
model = torch.load(model_save_file, weights_only=False)
model.to(device)

# Use a subset of test data for HMC (full dataset may be too slow)
num_hmc_samples_data = 1000  # number of data points to use
hmc_indices = np.random.choice(num_test, num_hmc_samples_data, replace=False)

# For PCANet, we use the reduced-dimension inputs/outputs
x_hmc = test_data['X_train'][hmc_indices]
y_hmc = test_data['Y_train'][hmc_indices]

print(f"Using {num_hmc_samples_data} test samples for HMC")
print(f"Model has {sum(p.numel() for p in model.parameters())} parameters")

# Initialize from current model parameters
flat0 = uq.pack_params(model).to(device)
print(f"Initial parameter vector shape: {flat0.shape}")

# Create log probability function
# Note: PCANet uses X -> Y mapping directly (no trunk network)
log_prob = uq.make_log_prob_fn(model, x_hmc, y_hmc, 
                            noise_std=0.05, prior_std=1.0)

# Adaptive HMC settings
hmc_num_samples = 2000
hmc_burn_in = 200          # Increased burn-in for adaptation
hmc_adapt_steps = 150       # Steps to adapt step size
hmc_initial_step_size = 1e-7
hmc_leapfrog_steps = 20
hmc_target_accept = 0.75    # Target acceptance rate (65-80% is optimal)

print(f"\nAdaptive HMC Settings:")
print(f"  num_samples: {hmc_num_samples}")
print(f"  burn_in: {hmc_burn_in}")
print(f"  adapt_steps: {hmc_adapt_steps}")
print(f"  initial_step_size: {hmc_initial_step_size}")
print(f"  leapfrog_steps: {hmc_leapfrog_steps}")
print(f"  target_accept: {hmc_target_accept}")
print()

hmcsamples, acc_rate, final_step_size, step_size_history = uq.hmc_adaptive(
    log_prob, 
    flat0.requires_grad_(True), 
    target_accept=hmc_target_accept,
    initial_step_size=hmc_initial_step_size, 
    leapfrog_steps=hmc_leapfrog_steps, 
    num_samples=hmc_num_samples, 
    burn_in=hmc_burn_in,
    adapt_steps=hmc_adapt_steps
)

print(f"\n{'='*60}")
print(f"Final Results:")
print(f"  Acceptance rate: {acc_rate:.3f} ({acc_rate*100:.1f}%)")
print(f"  Final step size: {final_step_size:.2e}")
print(f"  Collected {len(hmcsamples)} samples")
print(f"{'='*60}")
Using 1000 test samples for HMC
Model has 175850 parameters
Initial parameter vector shape: torch.Size([175850])

Adaptive HMC Settings:
  num_samples: 2000
  burn_in: 200
  adapt_steps: 150
  initial_step_size: 1e-07
  leapfrog_steps: 20
  target_accept: 0.75

Starting adaptive HMC with target acceptance rate: 75.00%
Adaptation will run for 150 iterations
Iter   50/2200: accept rate = 0.740, step_size = 1.38e-06, phase = adapting
Iter  100/2200: accept rate = 0.710, step_size = 1.62e-06, phase = adapting
Iter  150/2200: accept rate = 0.707, step_size = 6.19e-06, phase = adapting

>>> Adaptation complete! Final step size: 2.60e-06
>>> Acceptance rate during adaptation: 0.702

Iter  200/2200: accept rate = 0.735, step_size = 2.60e-06, phase = burn-in
Iter  250/2200: accept rate = 0.772, step_size = 2.60e-06, phase = sampling
Iter  300/2200: accept rate = 0.787, step_size = 2.60e-06, phase = sampling
Iter  350/2200: accept rate = 0.791, step_size = 2.60e-06, phase = sampling
Iter  400/2200: accept rate = 0.797, step_size = 2.60e-06, phase = sampling
Iter  450/2200: accept rate = 0.809, step_size = 2.60e-06, phase = sampling
Iter  500/2200: accept rate = 0.806, step_size = 2.60e-06, phase = sampling
Iter  550/2200: accept rate = 0.811, step_size = 2.60e-06, phase = sampling
Iter  600/2200: accept rate = 0.813, step_size = 2.60e-06, phase = sampling
Iter  650/2200: accept rate = 0.818, step_size = 2.60e-06, phase = sampling
Iter  700/2200: accept rate = 0.826, step_size = 2.60e-06, phase = sampling
Iter  750/2200: accept rate = 0.832, step_size = 2.60e-06, phase = sampling
Iter  800/2200: accept rate = 0.836, step_size = 2.60e-06, phase = sampling
Iter  850/2200: accept rate = 0.840, step_size = 2.60e-06, phase = sampling
Iter  900/2200: accept rate = 0.846, step_size = 2.60e-06, phase = sampling
Iter  950/2200: accept rate = 0.854, step_size = 2.60e-06, phase = sampling
Iter 1000/2200: accept rate = 0.857, step_size = 2.60e-06, phase = sampling
Iter 1050/2200: accept rate = 0.861, step_size = 2.60e-06, phase = sampling
Iter 1100/2200: accept rate = 0.863, step_size = 2.60e-06, phase = sampling
Iter 1150/2200: accept rate = 0.867, step_size = 2.60e-06, phase = sampling
Iter 1200/2200: accept rate = 0.873, step_size = 2.60e-06, phase = sampling
Iter 1250/2200: accept rate = 0.874, step_size = 2.60e-06, phase = sampling
Iter 1300/2200: accept rate = 0.875, step_size = 2.60e-06, phase = sampling
Iter 1350/2200: accept rate = 0.876, step_size = 2.60e-06, phase = sampling
Iter 1400/2200: accept rate = 0.878, step_size = 2.60e-06, phase = sampling
Iter 1450/2200: accept rate = 0.879, step_size = 2.60e-06, phase = sampling
Iter 1500/2200: accept rate = 0.881, step_size = 2.60e-06, phase = sampling
Iter 1550/2200: accept rate = 0.884, step_size = 2.60e-06, phase = sampling
Iter 1600/2200: accept rate = 0.886, step_size = 2.60e-06, phase = sampling
Iter 1650/2200: accept rate = 0.888, step_size = 2.60e-06, phase = sampling
Iter 1700/2200: accept rate = 0.889, step_size = 2.60e-06, phase = sampling
Iter 1750/2200: accept rate = 0.889, step_size = 2.60e-06, phase = sampling
Iter 1800/2200: accept rate = 0.891, step_size = 2.60e-06, phase = sampling
Iter 1850/2200: accept rate = 0.893, step_size = 2.60e-06, phase = sampling
Iter 1900/2200: accept rate = 0.895, step_size = 2.60e-06, phase = sampling
Iter 1950/2200: accept rate = 0.895, step_size = 2.60e-06, phase = sampling
Iter 2000/2200: accept rate = 0.896, step_size = 2.60e-06, phase = sampling
Iter 2050/2200: accept rate = 0.898, step_size = 2.60e-06, phase = sampling
Iter 2100/2200: accept rate = 0.900, step_size = 2.60e-06, phase = sampling
Iter 2150/2200: accept rate = 0.901, step_size = 2.60e-06, phase = sampling
Iter 2200/2200: accept rate = 0.902, step_size = 2.60e-06, phase = sampling

============================================================
Final Results:
  Acceptance rate: 0.902 (90.2%)
  Final step size: 2.60e-06
  Collected 2000 samples
============================================================
In [8]:
model = torch.load(model_save_file, weights_only=False)
model.to(device)
std1, result1 = uq.uqevaluation(num_test, test_data, model, data, method='hmc', hmcsamples=hmcsamples.clone())
model = torch.load(model_save_file, weights_only=False)
model.to(device)
uq.plot_uq(num_test,test_data,model,data,method='hmc',hmcsamples=hmcsamples.clone())
Evaluating uncertainty on 200 test samples...
Computing posterior predictions...

============================================================
Uncertainty Quality Metrics
============================================================

1. PREDICTION ACCURACY:
   RMSE: 0.017632
   MAE:  0.011963
   Mean Relative L2 Error: 0.59%
   Std Relative L2 Error:  0.09%

2. CALIBRATION (Coverage Analysis):
   Coverage within 1σ: 98.5% (ideal: 68.3%)
   Coverage within 2σ: 99.8% (ideal: 95.4%)
   Coverage within 3σ: 100.0% (ideal: 99.7%)
   Status: OVER-CONFIDENT (uncertainties too small)

3. SHARPNESS (Uncertainty Magnitude):
   Mean Epistemic σ: 0.002522
   Mean Total σ:     0.050118
   Mean Aleatoric σ: 0.050000 (fixed)

4. UNCERTAINTY-ERROR CORRELATION:
   Pearson correlation: 0.503
   → Good! High uncertainty correlates with high error

5. UNCERTAINTY DECOMPOSITION:
   Epistemic fraction: 0.5%
   Aleatoric fraction: 99.5%
   → Data noise dominates (model is confident)

6. PROPER SCORING RULES:
   Negative Log-Likelihood: -2.0141
   (Lower is better)

============================================================
Computing HMC predictions for 5 visualization samples...
No description has been provided for this image

Monte-Carlo Dropout¶

In [9]:
import importlib
importlib.reload(uq)
# Function to enable dropout during inference
model = torch.load(model_save_file, weights_only=False)
uq.inject_dropout(model)
torch.nn.Module.train(model)
for module in model.modules():
    if isinstance(module, torch.nn.Dropout):
        torch.nn.Module.train(module)
In [10]:
std2, result2 = uq.uqevaluation(num_test, test_data, model, data, method='mcd')
uq.plot_uq(num_test, test_data, model, data, method='mcd')
Evaluating uncertainty on 200 test samples...

============================================================
Uncertainty Quality Metrics
============================================================

1. PREDICTION ACCURACY:
   RMSE: 0.033399
   MAE:  0.022505
   Mean Relative L2 Error: 1.06%
   Std Relative L2 Error:  0.35%

2. CALIBRATION (Coverage Analysis):
   Coverage within 1σ: 100.0% (ideal: 68.3%)
   Coverage within 2σ: 100.0% (ideal: 95.4%)
   Coverage within 3σ: 100.0% (ideal: 99.7%)
   Status: OVER-CONFIDENT (uncertainties too small)

3. SHARPNESS (Uncertainty Magnitude):
   Mean Epistemic σ: 0.151001
   Mean Total σ:     0.163596
   Mean Aleatoric σ: 0.050000 (fixed)

4. UNCERTAINTY-ERROR CORRELATION:
   Pearson correlation: 0.442
   → Moderate correlation

5. UNCERTAINTY DECOMPOSITION:
   Epistemic fraction: 77.6%
   Aleatoric fraction: 22.4%
   → Balanced uncertainty sources

6. PROPER SCORING RULES:
   Negative Log-Likelihood: -1.0255
   (Lower is better)

============================================================
No description has been provided for this image

Laplace Approximation¶

In [11]:
# Reload the original model (without dropout) for Laplace approximation
model = torch.load(model_save_file, weights_only=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Laplace approximation settings
noise_std_laplace = 0.05  # Assumed observation noise (same as HMC/MC Dropout for fair comparison)
prior_std_laplace = 1.0   # Prior standard deviation for weights

print(f"\nLaplace Approximation Settings:")
print(f"  Assumed noise std: {noise_std_laplace}")
print(f"  Prior std: {prior_std_laplace}")

# Use a subset of training data for computing the Hessian (full dataset may be too expensive)
num_laplace_data = min(500, num_train)
laplace_indices = np.random.choice(num_train, num_laplace_data, replace=False)

x_laplace = train_data['X_train'][laplace_indices]
y_laplace = train_data['Y_train'][laplace_indices]

print(f"Using {num_laplace_data} training samples for Hessian computation")
print(f"Model has {sum(p.numel() for p in model.parameters())} parameters")

print("\nComputing diagonal Hessian approximation...")
H_diag = uq.compute_diagonal_hessian(
    model, x_laplace, y_laplace,
    noise_std_laplace, prior_std_laplace, device, batch_size=10, sample_outputs_per_batch=30
)

# Posterior variance (diagonal approximation)
# σ²_posterior = 1 / H_diag
# IMPORTANT: Clip the posterior variance to prevent excessively large perturbations
max_posterior_var = 0.01 ** 2  # Maximum variance corresponds to std of 0.01
posterior_var = 1.0 / (H_diag + 1e-8)  # Raw posterior variance
posterior_var = torch.clamp(posterior_var, max=max_posterior_var)  # Clip to prevent huge perturbations

print(f"\nHessian diagonal statistics:")
print(f"  Mean H_diag: {H_diag.mean().item():.4e}")
print(f"  Max H_diag:  {H_diag.max().item():.4e}")
print(f"  Min H_diag:  {H_diag.min().item():.4e}")
print(f"  Mean posterior σ (clipped): {torch.sqrt(posterior_var).mean().item():.4e}")
print(f"  Max posterior σ (clipped):  {torch.sqrt(posterior_var).max().item():.4e}")

# Clean up Hessian computation variables
del x_laplace, y_laplace
torch.cuda.empty_cache() if torch.cuda.is_available() else None

# ============================================================
# Sample from the Laplace posterior
# θ ~ N(θ_MAP, diag(1/H_diag))
# ============================================================
print("\nSampling from Laplace posterior...")

num_laplace_samples = 2000  # Number of posterior samples
theta_map = uq.pack_params(model).to(device)

# Compute posterior std once
posterior_std_vec = torch.sqrt(posterior_var)

# Sample from the posterior and store on CPU to save GPU memory
laplace_samples = []
for i in range(num_laplace_samples):
    # Sample: θ = θ_MAP + σ_posterior * ε, where ε ~ N(0, I)
    epsilon = torch.randn_like(theta_map)
    theta_sample = theta_map + posterior_std_vec * epsilon
    laplace_samples.append(theta_sample.cpu())
    del epsilon, theta_sample

laplace_samples = torch.stack(laplace_samples)
print(f"Generated {num_laplace_samples} posterior samples")

# Free memory from Hessian computation
del H_diag, posterior_var, posterior_std_vec
torch.cuda.empty_cache() if torch.cuda.is_available() else None
Laplace Approximation Settings:
  Assumed noise std: 0.05
  Prior std: 1.0
Using 500 training samples for Hessian computation
Model has 175850 parameters

Computing diagonal Hessian approximation...
  Processed 100/500 samples
  Processed 200/500 samples
  Processed 300/500 samples
  Processed 400/500 samples
  Processed 500/500 samples

Hessian diagonal statistics:
  Mean H_diag: 5.5446e+05
  Max H_diag:  9.7607e+07
  Min H_diag:  1.0000e+00
  Mean posterior σ (clipped): 3.5005e-03
  Max posterior σ (clipped):  1.0000e-02

Sampling from Laplace posterior...
Generated 2000 posterior samples
In [12]:
model = torch.load(model_save_file, weights_only=False)
model.to(device)
std3, result3 = uq.uqevaluation(num_test, test_data, model, data, method='la', lasamples=laplace_samples)
model = torch.load(model_save_file, weights_only=False)
model.to(device)
uq.plot_uq(num_test, test_data, model, data, method='la', lasamples=laplace_samples)
Evaluating uncertainty on 200 test samples...

============================================================
Uncertainty Quality Metrics
============================================================

1. PREDICTION ACCURACY:
   RMSE: 0.029732
   MAE:  0.019645
   Mean Relative L2 Error: 0.95%
   Std Relative L2 Error:  0.27%

2. CALIBRATION (Coverage Analysis):
   Coverage within 1σ: 93.2% (ideal: 68.3%)
   Coverage within 2σ: 99.0% (ideal: 95.4%)
   Coverage within 3σ: 99.8% (ideal: 99.7%)
   Status: OVER-CONFIDENT (uncertainties too small)

3. SHARPNESS (Uncertainty Magnitude):
   Mean Epistemic σ: 0.005489
   Mean Total σ:     0.050421
   Mean Aleatoric σ: 0.050000 (fixed)

4. UNCERTAINTY-ERROR CORRELATION:
   Pearson correlation: 0.581
   → Good! High uncertainty correlates with high error

5. UNCERTAINTY DECOMPOSITION:
   Epistemic fraction: 1.6%
   Aleatoric fraction: 98.4%
   → Data noise dominates (model is confident)

6. PROPER SCORING RULES:
   Negative Log-Likelihood: -1.9035
   (Lower is better)

============================================================
No description has been provided for this image
In [13]:
uq.comparison_uq(result1,result2,result3)
======================================================================
COMPARISON: HMC vs MC Dropout vs Laplace Approximation
======================================================================

Metric                             HMC   MC Dropout      Laplace      Ideal
-------------------------------------------------------------------------------------
RMSE                          0.017632     0.033399     0.029732      Lower
MAE                           0.011963     0.022505     0.019645      Lower
Mean Rel. L2 Error (%)            0.59         1.06         0.95      Lower
Coverage 1σ (%)                   98.5        100.0         93.2       68.3
Coverage 2σ (%)                   99.8        100.0         99.0       95.4
Coverage 3σ (%)                  100.0        100.0         99.8       99.7
Mean Epistemic σ              0.002522     0.151001     0.005489          -
Mean Total σ                  0.050118     0.163596     0.050421          -
Epistemic Fraction (%)             0.5         77.6          1.6          -
Uncertainty-Error Corr.          0.503        0.442        0.581     Higher
NLL                            -2.0141      -1.0255      -1.9035      Lower
No description has been provided for this image

Behaviour on OOD data¶

In [14]:
datas = DataProcessor(data_folder + data_prefix + '_samples_ood.npz', 100, 600, num_inp_fn_points, num_out_fn_points, num_Y_components, num_inp_red_dim, num_out_red_dim)
ood_data = {'X_train': datas.X_test, 'X_trunk': datas.X_trunk, 'Y_train': datas.Y_test}    
In [15]:
model = torch.load(model_save_file, weights_only=False)
model.to(device)
std4, result4 = uq.uqevaluation(500, ood_data, model, datas, method = 'hmc', hmcsamples=hmcsamples.clone())
Evaluating uncertainty on 200 test samples...
Computing posterior predictions...
============================================================
Uncertainty Quality Metrics
============================================================

1. PREDICTION ACCURACY:
   RMSE: 10.186084
   MAE:  5.552376
   Mean Relative L2 Error: 765.33%
   Std Relative L2 Error:  929.47%

2. CALIBRATION (Coverage Analysis):
   Coverage within 1σ: 17.1% (ideal: 68.3%)
   Coverage within 2σ: 26.2% (ideal: 95.4%)
   Coverage within 3σ: 32.9% (ideal: 99.7%)
   Status: UNDER-CONFIDENT (uncertainties too large)

3. SHARPNESS (Uncertainty Magnitude):
   Mean Epistemic σ: 0.016884
   Mean Total σ:     0.056963
   Mean Aleatoric σ: 0.050000 (fixed)

4. UNCERTAINTY-ERROR CORRELATION:
   Pearson correlation: 0.978
   → Good! High uncertainty correlates with high error

5. UNCERTAINTY DECOMPOSITION:
   Epistemic fraction: 12.6%
   Aleatoric fraction: 87.4%
   → Data noise dominates (model is confident)

6. PROPER SCORING RULES:
   Negative Log-Likelihood: 10949.2194
   (Lower is better)

============================================================
In [16]:
model = torch.load(model_save_file, weights_only=False)
model.to(device)
uq.inject_dropout(model)
torch.nn.Module.train(model)
for module in model.modules():
    if isinstance(module, torch.nn.Dropout):
        torch.nn.Module.train(module)
std5, result5 = uq.uqevaluation(500, ood_data, model, datas, 'mcd')
Evaluating uncertainty on 200 test samples...

============================================================
Uncertainty Quality Metrics
============================================================

1. PREDICTION ACCURACY:
   RMSE: 10.444515
   MAE:  5.984565
   Mean Relative L2 Error: 827.39%
   Std Relative L2 Error:  918.46%

2. CALIBRATION (Coverage Analysis):
   Coverage within 1σ: 31.2% (ideal: 68.3%)
   Coverage within 2σ: 58.2% (ideal: 95.4%)
   Coverage within 3σ: 93.5% (ideal: 99.7%)
   Status: UNDER-CONFIDENT (uncertainties too large)

3. SHARPNESS (Uncertainty Magnitude):
   Mean Epistemic σ: 2.658173
   Mean Total σ:     2.666734
   Mean Aleatoric σ: 0.050000 (fixed)

4. UNCERTAINTY-ERROR CORRELATION:
   Pearson correlation: 0.983
   → Good! High uncertainty correlates with high error

5. UNCERTAINTY DECOMPOSITION:
   Epistemic fraction: 84.0%
   Aleatoric fraction: 16.0%
   → Model uncertainty dominates (more data may help)

6. PROPER SCORING RULES:
   Negative Log-Likelihood: 2.4249
   (Lower is better)

============================================================
In [17]:
model = torch.load(model_save_file, weights_only=False)
model.to(device)
std6, result6 = uq.uqevaluation(500, ood_data, model, datas, 'la', lasamples=laplace_samples.clone())
Evaluating uncertainty on 200 test samples...

============================================================
Uncertainty Quality Metrics
============================================================

1. PREDICTION ACCURACY:
   RMSE: 10.313079
   MAE:  5.781051
   Mean Relative L2 Error: 800.17%
   Std Relative L2 Error:  921.98%

2. CALIBRATION (Coverage Analysis):
   Coverage within 1σ: 16.9% (ideal: 68.3%)
   Coverage within 2σ: 26.1% (ideal: 95.4%)
   Coverage within 3σ: 32.5% (ideal: 99.7%)
   Status: UNDER-CONFIDENT (uncertainties too large)

3. SHARPNESS (Uncertainty Magnitude):
   Mean Epistemic σ: 0.062018
   Mean Total σ:     0.093024
   Mean Aleatoric σ: 0.050000 (fixed)

4. UNCERTAINTY-ERROR CORRELATION:
   Pearson correlation: 1.000
   → Good! High uncertainty correlates with high error

5. UNCERTAINTY DECOMPOSITION:
   Epistemic fraction: 34.4%
   Aleatoric fraction: 65.6%
   → Balanced uncertainty sources

6. PROPER SCORING RULES:
   Negative Log-Likelihood: 1611.6868
   (Lower is better)

============================================================
In [18]:
uq.comparison_uq(result4, result5, result6)
======================================================================
COMPARISON: HMC vs MC Dropout vs Laplace Approximation
======================================================================

Metric                             HMC   MC Dropout      Laplace      Ideal
-------------------------------------------------------------------------------------
RMSE                         10.186084    10.444515    10.313079      Lower
MAE                           5.552376     5.984565     5.781051      Lower
Mean Rel. L2 Error (%)          765.33       827.39       800.17      Lower
Coverage 1σ (%)                   17.1         31.2         16.9       68.3
Coverage 2σ (%)                   26.2         58.2         26.1       95.4
Coverage 3σ (%)                   32.9         93.5         32.5       99.7
Mean Epistemic σ              0.016884     2.658173     0.062018          -
Mean Total σ                  0.056963     2.666734     0.093024          -
Epistemic Fraction (%)            12.6         84.0         34.4          -
Uncertainty-Error Corr.          0.978        0.983        1.000     Higher
NLL                         10949.2194       2.4249    1611.6868      Lower
No description has been provided for this image

ood data detection¶

In [23]:
hmc_ood_eval = np.concatenate((std1.mean(axis=1),std4.mean(axis=1)), axis=0)  # epistemic + aleatoric
hmc_ood_eval = hmc_ood_eval/np.max(hmc_ood_eval)
mcd_ood_eval = np.concatenate((std2.mean(axis=1),std5.mean(axis=1)), axis=0)
mcd_ood_eval = mcd_ood_eval/np.max(mcd_ood_eval)
la_ood_eval = np.concatenate((std3.mean(axis=1),std6.mean(axis=1)), axis=0)
la_ood_eval = la_ood_eval/np.max(la_ood_eval)
oods = np.concatenate((np.zeros(std1.shape[0]), np.ones(std4.shape[0])), axis=0)  # 0 for ID, 1 for OOD
#Examine OOD data:
# Step 1: Generate uncertainty scores: ood_eval
# Step 2: Create true labels for AUROC (1 for OOD, 0 for ID): oods
# Step 3: Calculate AUROC
for ood, uqmethod in zip([hmc_ood_eval, mcd_ood_eval, la_ood_eval], ['HMC', 'MC Dropout', 'Laplace Approximation']):
    auroc = roc_auc_score(oods,ood)
    fpr, tpr, _ = roc_curve(oods, ood)
    plt.figure()
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUROC = {auroc:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'Receiver Operating Characteristic (ROC) using {uqmethod}')
    plt.legend(loc='lower right')
    plt.show()
    print(f"AUROC for OOD detection: {auroc}")
No description has been provided for this image
AUROC for OOD detection: 0.7480249999999999
No description has been provided for this image
AUROC for OOD detection: 0.801425
No description has been provided for this image
AUROC for OOD detection: 0.75305